* Reset settings and initialize log file
launch, path("build/classify_states")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Classify states as having early/late/mixed school closures.
*-------------------------------------------------------------------------------


* Compute the fraction of 16-year olds enrolled in high school in each month
gzuse "$basepath/data/derived/cps_bms_sample_allages.dta.gz", clear
keep if age == 16
gen byte students = (school_status == 1)
gcollapse (mean) students [pw = wtfinl], by(state_fips month)
tempfile enrollment
save `enrollment'

* Compute the number of teachers reporting to work in K-12 schools in each month
gzuse "$basepath/data/derived/cps_bms_sample_allages.dta.gz", clear
keep if ind1990 == 842 & inrange(occ1990, 155, 159) & empstat == 10
gcollapse (sum) teachers = wtfinl, by(state_fips month)

* Combine both measures
merge 1:1 state_fips month using `enrollment', assert(3) nogenerate

* For each measure, compute the share of the May-to-July drop occurring by June
keep if inlist(month, 5, 6, 7)
reshape wide students teachers, i(state_fips) j(month)
foreach v in students teachers {
	gen closure_share_`v' = 100 * (`v'5 - `v'6)/(`v'5 - `v'7)
}

* Compare the two measures
sum closure_share_students, detail
sum closure_share_teachers, detail
corr closure_share_students closure_share_teachers

* Use enrollment rates to classify states as having early/mixed/late closures
sort closure_share_students
gen closure_timing = .
replace closure_timing = 1 if closure_share_students >= 66.67
replace closure_timing = 2 if closure_share_students >= 33.33 & closure_share_students < 66.67
replace closure_timing = 3 if closure_share_students <  33.33

label define closure_timing_lbl 0 "Pooled", replace
label define closure_timing_lbl 1 "Early school closures", add
label define closure_timing_lbl 2 "Mixed school closures", add
label define closure_timing_lbl 3 "Late school closures", add
label values closure_timing closure_timing_lbl

* Label variables
label variable closure_timing         "Classification of states based on high school enrollments"
label variable closure_share_students "Share of May-July decline in 16-year-old HS enrollment occurring by June"
label variable closure_share_teachers "Share of May-July decline in # of teachers reporting to work occurring by June"

* Save to disk
gisid state_fips
keep state_fips closure_timing closure_share_students closure_share_teachers
order state_fips closure_timing closure_share_students closure_share_teachers
sort state_fips
compress
save "$basepath/data/derived/state_closure_timing.dta", replace

* Close the log file
unlaunch
